.text

.globl  ossl_rsaz_avxifma_eligible
.type   ossl_rsaz_avxifma_eligible,@function
.align  32
ossl_rsaz_avxifma_eligible:
        movl    OPENSSL_ia32cap_P+20(%rip),%ecx
        xorl    %eax,%eax
        andl    $8388608,%ecx
        cmpl    $8388608,%ecx
        cmovel  %ecx,%eax
        .byte   0xf3,0xc3
.size   ossl_rsaz_avxifma_eligible, .-ossl_rsaz_avxifma_eligible
.text

.globl  ossl_rsaz_amm52x20_x1_avxifma256
.type   ossl_rsaz_amm52x20_x1_avxifma256,@function
.align  32
ossl_rsaz_amm52x20_x1_avxifma256:
.cfi_startproc
.byte   243,15,30,250
        pushq   %rbx
.cfi_adjust_cfa_offset  8
.cfi_offset     %rbx,-16
        pushq   %rbp
.cfi_adjust_cfa_offset  8
.cfi_offset     %rbp,-24
        pushq   %r12
.cfi_adjust_cfa_offset  8
.cfi_offset     %r12,-32
        pushq   %r13
.cfi_adjust_cfa_offset  8
.cfi_offset     %r13,-40
        pushq   %r14
.cfi_adjust_cfa_offset  8
.cfi_offset     %r14,-48
        pushq   %r15
.cfi_adjust_cfa_offset  8
.cfi_offset     %r15,-56
.Lossl_rsaz_amm52x20_x1_avxifma256_body:


        vpxor   %ymm0,%ymm0,%ymm0
        vmovapd %ymm0,%ymm3
        vmovapd %ymm0,%ymm5
        vmovapd %ymm0,%ymm6
        vmovapd %ymm0,%ymm7
        vmovapd %ymm0,%ymm8

        xorl    %r9d,%r9d

        movq    %rdx,%r11
        movq    $0xfffffffffffff,%rax


        movl    $5,%ebx

.align  32
.Lloop5:
        movq    0(%r11),%r13

        vpbroadcastq    0(%r11),%ymm1
        movq    0(%rsi),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        movq    %r12,%r10
        adcq    $0,%r10

        movq    %r8,%r13
        imulq   %r9,%r13
        andq    %rax,%r13

        vmovq   %r13,%xmm2
        vpbroadcastq    %xmm2,%ymm2
        movq    0(%rcx),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        adcq    %r12,%r10

        shrq    $52,%r9
        salq    $12,%r10
        orq     %r10,%r9

        leaq    -168(%rsp),%rsp
{vex}   vpmadd52luq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52luq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52luq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52luq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52luq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52luq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52luq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52luq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52luq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52luq     128(%rcx),%ymm2,%ymm8


        vmovdqu %ymm3,0(%rsp)
        vmovdqu %ymm5,32(%rsp)
        vmovdqu %ymm6,64(%rsp)
        vmovdqu %ymm7,96(%rsp)
        vmovdqu %ymm8,128(%rsp)
        movq    $0,160(%rsp)

        vmovdqu 8(%rsp),%ymm3
        vmovdqu 40(%rsp),%ymm5
        vmovdqu 72(%rsp),%ymm6
        vmovdqu 104(%rsp),%ymm7
        vmovdqu 136(%rsp),%ymm8

        addq    8(%rsp),%r9

{vex}   vpmadd52huq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52huq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52huq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52huq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52huq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52huq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52huq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52huq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52huq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52huq     128(%rcx),%ymm2,%ymm8
        leaq    168(%rsp),%rsp
        movq    8(%r11),%r13

        vpbroadcastq    8(%r11),%ymm1
        movq    0(%rsi),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        movq    %r12,%r10
        adcq    $0,%r10

        movq    %r8,%r13
        imulq   %r9,%r13
        andq    %rax,%r13

        vmovq   %r13,%xmm2
        vpbroadcastq    %xmm2,%ymm2
        movq    0(%rcx),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        adcq    %r12,%r10

        shrq    $52,%r9
        salq    $12,%r10
        orq     %r10,%r9

        leaq    -168(%rsp),%rsp
{vex}   vpmadd52luq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52luq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52luq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52luq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52luq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52luq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52luq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52luq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52luq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52luq     128(%rcx),%ymm2,%ymm8


        vmovdqu %ymm3,0(%rsp)
        vmovdqu %ymm5,32(%rsp)
        vmovdqu %ymm6,64(%rsp)
        vmovdqu %ymm7,96(%rsp)
        vmovdqu %ymm8,128(%rsp)
        movq    $0,160(%rsp)

        vmovdqu 8(%rsp),%ymm3
        vmovdqu 40(%rsp),%ymm5
        vmovdqu 72(%rsp),%ymm6
        vmovdqu 104(%rsp),%ymm7
        vmovdqu 136(%rsp),%ymm8

        addq    8(%rsp),%r9

{vex}   vpmadd52huq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52huq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52huq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52huq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52huq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52huq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52huq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52huq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52huq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52huq     128(%rcx),%ymm2,%ymm8
        leaq    168(%rsp),%rsp
        movq    16(%r11),%r13

        vpbroadcastq    16(%r11),%ymm1
        movq    0(%rsi),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        movq    %r12,%r10
        adcq    $0,%r10

        movq    %r8,%r13
        imulq   %r9,%r13
        andq    %rax,%r13

        vmovq   %r13,%xmm2
        vpbroadcastq    %xmm2,%ymm2
        movq    0(%rcx),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        adcq    %r12,%r10

        shrq    $52,%r9
        salq    $12,%r10
        orq     %r10,%r9

        leaq    -168(%rsp),%rsp
{vex}   vpmadd52luq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52luq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52luq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52luq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52luq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52luq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52luq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52luq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52luq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52luq     128(%rcx),%ymm2,%ymm8


        vmovdqu %ymm3,0(%rsp)
        vmovdqu %ymm5,32(%rsp)
        vmovdqu %ymm6,64(%rsp)
        vmovdqu %ymm7,96(%rsp)
        vmovdqu %ymm8,128(%rsp)
        movq    $0,160(%rsp)

        vmovdqu 8(%rsp),%ymm3
        vmovdqu 40(%rsp),%ymm5
        vmovdqu 72(%rsp),%ymm6
        vmovdqu 104(%rsp),%ymm7
        vmovdqu 136(%rsp),%ymm8

        addq    8(%rsp),%r9

{vex}   vpmadd52huq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52huq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52huq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52huq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52huq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52huq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52huq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52huq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52huq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52huq     128(%rcx),%ymm2,%ymm8
        leaq    168(%rsp),%rsp
        movq    24(%r11),%r13

        vpbroadcastq    24(%r11),%ymm1
        movq    0(%rsi),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        movq    %r12,%r10
        adcq    $0,%r10

        movq    %r8,%r13
        imulq   %r9,%r13
        andq    %rax,%r13

        vmovq   %r13,%xmm2
        vpbroadcastq    %xmm2,%ymm2
        movq    0(%rcx),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        adcq    %r12,%r10

        shrq    $52,%r9
        salq    $12,%r10
        orq     %r10,%r9

        leaq    -168(%rsp),%rsp
{vex}   vpmadd52luq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52luq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52luq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52luq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52luq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52luq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52luq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52luq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52luq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52luq     128(%rcx),%ymm2,%ymm8


        vmovdqu %ymm3,0(%rsp)
        vmovdqu %ymm5,32(%rsp)
        vmovdqu %ymm6,64(%rsp)
        vmovdqu %ymm7,96(%rsp)
        vmovdqu %ymm8,128(%rsp)
        movq    $0,160(%rsp)

        vmovdqu 8(%rsp),%ymm3
        vmovdqu 40(%rsp),%ymm5
        vmovdqu 72(%rsp),%ymm6
        vmovdqu 104(%rsp),%ymm7
        vmovdqu 136(%rsp),%ymm8

        addq    8(%rsp),%r9

{vex}   vpmadd52huq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52huq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52huq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52huq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52huq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52huq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52huq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52huq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52huq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52huq     128(%rcx),%ymm2,%ymm8
        leaq    168(%rsp),%rsp
        leaq    32(%r11),%r11
        decl    %ebx
        jne     .Lloop5

        vmovq   %r9,%xmm0
        vpbroadcastq    %xmm0,%ymm0
        vpblendd        $3,%ymm0,%ymm3,%ymm3



        vpsrlq  $52,%ymm3,%ymm0
        vpsrlq  $52,%ymm5,%ymm1
        vpsrlq  $52,%ymm6,%ymm2
        vpsrlq  $52,%ymm7,%ymm13
        vpsrlq  $52,%ymm8,%ymm14


        vpermq  $144,%ymm14,%ymm14
        vpermq  $3,%ymm13,%ymm15
        vblendpd        $1,%ymm15,%ymm14,%ymm14

        vpermq  $144,%ymm13,%ymm13
        vpermq  $3,%ymm2,%ymm15
        vblendpd        $1,%ymm15,%ymm13,%ymm13

        vpermq  $144,%ymm2,%ymm2
        vpermq  $3,%ymm1,%ymm15
        vblendpd        $1,%ymm15,%ymm2,%ymm2

        vpermq  $144,%ymm1,%ymm1
        vpermq  $3,%ymm0,%ymm15
        vblendpd        $1,%ymm15,%ymm1,%ymm1

        vpermq  $144,%ymm0,%ymm0
        vpand   .Lhigh64x3(%rip),%ymm0,%ymm0


        vpand   .Lmask52x4(%rip),%ymm3,%ymm3
        vpand   .Lmask52x4(%rip),%ymm5,%ymm5
        vpand   .Lmask52x4(%rip),%ymm6,%ymm6
        vpand   .Lmask52x4(%rip),%ymm7,%ymm7
        vpand   .Lmask52x4(%rip),%ymm8,%ymm8


        vpaddq  %ymm0,%ymm3,%ymm3
        vpaddq  %ymm1,%ymm5,%ymm5
        vpaddq  %ymm2,%ymm6,%ymm6
        vpaddq  %ymm13,%ymm7,%ymm7
        vpaddq  %ymm14,%ymm8,%ymm8



        vpcmpgtq        .Lmask52x4(%rip),%ymm3,%ymm0
        vpcmpgtq        .Lmask52x4(%rip),%ymm5,%ymm1
        vpcmpgtq        .Lmask52x4(%rip),%ymm6,%ymm2
        vpcmpgtq        .Lmask52x4(%rip),%ymm7,%ymm13
        vpcmpgtq        .Lmask52x4(%rip),%ymm8,%ymm14
        vmovmskpd       %ymm0,%r14d
        vmovmskpd       %ymm1,%r13d
        vmovmskpd       %ymm2,%r12d
        vmovmskpd       %ymm13,%r11d
        vmovmskpd       %ymm14,%r10d


        vpcmpeqq        .Lmask52x4(%rip),%ymm3,%ymm0
        vpcmpeqq        .Lmask52x4(%rip),%ymm5,%ymm1
        vpcmpeqq        .Lmask52x4(%rip),%ymm6,%ymm2
        vpcmpeqq        .Lmask52x4(%rip),%ymm7,%ymm13
        vpcmpeqq        .Lmask52x4(%rip),%ymm8,%ymm14
        vmovmskpd       %ymm0,%r9d
        vmovmskpd       %ymm1,%r8d
        vmovmskpd       %ymm2,%ebx
        vmovmskpd       %ymm13,%ecx
        vmovmskpd       %ymm14,%edx



        shlb    $4,%r13b
        orb     %r13b,%r14b
        shlb    $4,%r11b
        orb     %r11b,%r12b

        addb    %r14b,%r14b
        adcb    %r12b,%r12b
        adcb    %r10b,%r10b

        shlb    $4,%r8b
        orb     %r8b,%r9b
        shlb    $4,%cl
        orb     %cl,%bl

        addb    %r9b,%r14b
        adcb    %bl,%r12b
        adcb    %dl,%r10b

        xorb    %r9b,%r14b
        xorb    %bl,%r12b
        xorb    %dl,%r10b

        leaq    .Lkmasklut(%rip),%rdx

        movb    %r14b,%r13b
        andq    $0xf,%r14
        vpsubq  .Lmask52x4(%rip),%ymm3,%ymm0
        shlq    $5,%r14
        vmovapd (%rdx,%r14,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm3,%ymm3

        shrb    $4,%r13b
        andq    $0xf,%r13
        vpsubq  .Lmask52x4(%rip),%ymm5,%ymm0
        shlq    $5,%r13
        vmovapd (%rdx,%r13,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm5,%ymm5

        movb    %r12b,%r11b
        andq    $0xf,%r12
        vpsubq  .Lmask52x4(%rip),%ymm6,%ymm0
        shlq    $5,%r12
        vmovapd (%rdx,%r12,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm6,%ymm6

        shrb    $4,%r11b
        andq    $0xf,%r11
        vpsubq  .Lmask52x4(%rip),%ymm7,%ymm0
        shlq    $5,%r11
        vmovapd (%rdx,%r11,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm7,%ymm7

        andq    $0xf,%r10
        vpsubq  .Lmask52x4(%rip),%ymm8,%ymm0
        shlq    $5,%r10
        vmovapd (%rdx,%r10,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm8,%ymm8


        vpand   .Lmask52x4(%rip),%ymm3,%ymm3
        vpand   .Lmask52x4(%rip),%ymm5,%ymm5
        vpand   .Lmask52x4(%rip),%ymm6,%ymm6
        vpand   .Lmask52x4(%rip),%ymm7,%ymm7
        vpand   .Lmask52x4(%rip),%ymm8,%ymm8

        vmovdqu %ymm3,0(%rdi)
        vmovdqu %ymm5,32(%rdi)
        vmovdqu %ymm6,64(%rdi)
        vmovdqu %ymm7,96(%rdi)
        vmovdqu %ymm8,128(%rdi)

        vzeroupper
        movq    0(%rsp),%r15
.cfi_restore    %r15
        movq    8(%rsp),%r14
.cfi_restore    %r14
        movq    16(%rsp),%r13
.cfi_restore    %r13
        movq    24(%rsp),%r12
.cfi_restore    %r12
        movq    32(%rsp),%rbp
.cfi_restore    %rbp
        movq    40(%rsp),%rbx
.cfi_restore    %rbx
        leaq    48(%rsp),%rsp
.cfi_adjust_cfa_offset  -48
.Lossl_rsaz_amm52x20_x1_avxifma256_epilogue:
        .byte   0xf3,0xc3
.cfi_endproc
.size   ossl_rsaz_amm52x20_x1_avxifma256, .-ossl_rsaz_amm52x20_x1_avxifma256
.section        .rodata
.align  32
.Lmask52x4:
.quad   0xfffffffffffff
.quad   0xfffffffffffff
.quad   0xfffffffffffff
.quad   0xfffffffffffff
.Lhigh64x3:
.quad   0x0
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.Lkmasklut:

.quad   0x0
.quad   0x0
.quad   0x0
.quad   0x0

.quad   0xffffffffffffffff
.quad   0x0
.quad   0x0
.quad   0x0

.quad   0x0
.quad   0xffffffffffffffff
.quad   0x0
.quad   0x0

.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0x0
.quad   0x0

.quad   0x0
.quad   0x0
.quad   0xffffffffffffffff
.quad   0x0

.quad   0xffffffffffffffff
.quad   0x0
.quad   0xffffffffffffffff
.quad   0x0

.quad   0x0
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0x0

.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0x0

.quad   0x0
.quad   0x0
.quad   0x0
.quad   0xffffffffffffffff

.quad   0xffffffffffffffff
.quad   0x0
.quad   0x0
.quad   0xffffffffffffffff

.quad   0x0
.quad   0xffffffffffffffff
.quad   0x0
.quad   0xffffffffffffffff

.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0x0
.quad   0xffffffffffffffff

.quad   0x0
.quad   0x0
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff

.quad   0xffffffffffffffff
.quad   0x0
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff

.quad   0x0
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff

.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.quad   0xffffffffffffffff
.text

.globl  ossl_rsaz_amm52x20_x2_avxifma256
.type   ossl_rsaz_amm52x20_x2_avxifma256,@function
.align  32
ossl_rsaz_amm52x20_x2_avxifma256:
.cfi_startproc
.byte   243,15,30,250
        pushq   %rbx
.cfi_adjust_cfa_offset  8
.cfi_offset     %rbx,-16
        pushq   %rbp
.cfi_adjust_cfa_offset  8
.cfi_offset     %rbp,-24
        pushq   %r12
.cfi_adjust_cfa_offset  8
.cfi_offset     %r12,-32
        pushq   %r13
.cfi_adjust_cfa_offset  8
.cfi_offset     %r13,-40
        pushq   %r14
.cfi_adjust_cfa_offset  8
.cfi_offset     %r14,-48
        pushq   %r15
.cfi_adjust_cfa_offset  8
.cfi_offset     %r15,-56
.Lossl_rsaz_amm52x20_x2_avxifma256_body:


        vpxor   %ymm0,%ymm0,%ymm0
        vmovapd %ymm0,%ymm3
        vmovapd %ymm0,%ymm5
        vmovapd %ymm0,%ymm6
        vmovapd %ymm0,%ymm7
        vmovapd %ymm0,%ymm8
        vmovapd %ymm0,%ymm4
        vmovapd %ymm0,%ymm9
        vmovapd %ymm0,%ymm10
        vmovapd %ymm0,%ymm11
        vmovapd %ymm0,%ymm12

        xorl    %r9d,%r9d
        xorl    %r15d,%r15d

        movq    %rdx,%r11
        movq    $0xfffffffffffff,%rax

        movl    $20,%ebx

.align  32
.Lloop20:
        movq    0(%r11),%r13

        vpbroadcastq    0(%r11),%ymm1
        movq    0(%rsi),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        movq    %r12,%r10
        adcq    $0,%r10

        movq    (%r8),%r13
        imulq   %r9,%r13
        andq    %rax,%r13

        vmovq   %r13,%xmm2
        vpbroadcastq    %xmm2,%ymm2
        movq    0(%rcx),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r9
        adcq    %r12,%r10

        shrq    $52,%r9
        salq    $12,%r10
        orq     %r10,%r9

        leaq    -168(%rsp),%rsp
{vex}   vpmadd52luq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52luq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52luq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52luq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52luq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52luq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52luq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52luq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52luq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52luq     128(%rcx),%ymm2,%ymm8


        vmovdqu %ymm3,0(%rsp)
        vmovdqu %ymm5,32(%rsp)
        vmovdqu %ymm6,64(%rsp)
        vmovdqu %ymm7,96(%rsp)
        vmovdqu %ymm8,128(%rsp)
        movq    $0,160(%rsp)

        vmovdqu 8(%rsp),%ymm3
        vmovdqu 40(%rsp),%ymm5
        vmovdqu 72(%rsp),%ymm6
        vmovdqu 104(%rsp),%ymm7
        vmovdqu 136(%rsp),%ymm8

        addq    8(%rsp),%r9

{vex}   vpmadd52huq     0(%rsi),%ymm1,%ymm3
{vex}   vpmadd52huq     32(%rsi),%ymm1,%ymm5
{vex}   vpmadd52huq     64(%rsi),%ymm1,%ymm6
{vex}   vpmadd52huq     96(%rsi),%ymm1,%ymm7
{vex}   vpmadd52huq     128(%rsi),%ymm1,%ymm8

{vex}   vpmadd52huq     0(%rcx),%ymm2,%ymm3
{vex}   vpmadd52huq     32(%rcx),%ymm2,%ymm5
{vex}   vpmadd52huq     64(%rcx),%ymm2,%ymm6
{vex}   vpmadd52huq     96(%rcx),%ymm2,%ymm7
{vex}   vpmadd52huq     128(%rcx),%ymm2,%ymm8
        leaq    168(%rsp),%rsp
        movq    160(%r11),%r13

        vpbroadcastq    160(%r11),%ymm1
        movq    160(%rsi),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r15
        movq    %r12,%r10
        adcq    $0,%r10

        movq    8(%r8),%r13
        imulq   %r15,%r13
        andq    %rax,%r13

        vmovq   %r13,%xmm2
        vpbroadcastq    %xmm2,%ymm2
        movq    160(%rcx),%rdx
        mulxq   %r13,%r13,%r12
        addq    %r13,%r15
        adcq    %r12,%r10

        shrq    $52,%r15
        salq    $12,%r10
        orq     %r10,%r15

        leaq    -168(%rsp),%rsp
{vex}   vpmadd52luq     160(%rsi),%ymm1,%ymm4
{vex}   vpmadd52luq     192(%rsi),%ymm1,%ymm9
{vex}   vpmadd52luq     224(%rsi),%ymm1,%ymm10
{vex}   vpmadd52luq     256(%rsi),%ymm1,%ymm11
{vex}   vpmadd52luq     288(%rsi),%ymm1,%ymm12

{vex}   vpmadd52luq     160(%rcx),%ymm2,%ymm4
{vex}   vpmadd52luq     192(%rcx),%ymm2,%ymm9
{vex}   vpmadd52luq     224(%rcx),%ymm2,%ymm10
{vex}   vpmadd52luq     256(%rcx),%ymm2,%ymm11
{vex}   vpmadd52luq     288(%rcx),%ymm2,%ymm12


        vmovdqu %ymm4,0(%rsp)
        vmovdqu %ymm9,32(%rsp)
        vmovdqu %ymm10,64(%rsp)
        vmovdqu %ymm11,96(%rsp)
        vmovdqu %ymm12,128(%rsp)
        movq    $0,160(%rsp)

        vmovdqu 8(%rsp),%ymm4
        vmovdqu 40(%rsp),%ymm9
        vmovdqu 72(%rsp),%ymm10
        vmovdqu 104(%rsp),%ymm11
        vmovdqu 136(%rsp),%ymm12

        addq    8(%rsp),%r15

{vex}   vpmadd52huq     160(%rsi),%ymm1,%ymm4
{vex}   vpmadd52huq     192(%rsi),%ymm1,%ymm9
{vex}   vpmadd52huq     224(%rsi),%ymm1,%ymm10
{vex}   vpmadd52huq     256(%rsi),%ymm1,%ymm11
{vex}   vpmadd52huq     288(%rsi),%ymm1,%ymm12

{vex}   vpmadd52huq     160(%rcx),%ymm2,%ymm4
{vex}   vpmadd52huq     192(%rcx),%ymm2,%ymm9
{vex}   vpmadd52huq     224(%rcx),%ymm2,%ymm10
{vex}   vpmadd52huq     256(%rcx),%ymm2,%ymm11
{vex}   vpmadd52huq     288(%rcx),%ymm2,%ymm12
        leaq    168(%rsp),%rsp
        leaq    8(%r11),%r11
        decl    %ebx
        jne     .Lloop20

        vmovq   %r9,%xmm0
        vpbroadcastq    %xmm0,%ymm0
        vpblendd        $3,%ymm0,%ymm3,%ymm3



        vpsrlq  $52,%ymm3,%ymm0
        vpsrlq  $52,%ymm5,%ymm1
        vpsrlq  $52,%ymm6,%ymm2
        vpsrlq  $52,%ymm7,%ymm13
        vpsrlq  $52,%ymm8,%ymm14


        vpermq  $144,%ymm14,%ymm14
        vpermq  $3,%ymm13,%ymm15
        vblendpd        $1,%ymm15,%ymm14,%ymm14

        vpermq  $144,%ymm13,%ymm13
        vpermq  $3,%ymm2,%ymm15
        vblendpd        $1,%ymm15,%ymm13,%ymm13

        vpermq  $144,%ymm2,%ymm2
        vpermq  $3,%ymm1,%ymm15
        vblendpd        $1,%ymm15,%ymm2,%ymm2

        vpermq  $144,%ymm1,%ymm1
        vpermq  $3,%ymm0,%ymm15
        vblendpd        $1,%ymm15,%ymm1,%ymm1

        vpermq  $144,%ymm0,%ymm0
        vpand   .Lhigh64x3(%rip),%ymm0,%ymm0


        vpand   .Lmask52x4(%rip),%ymm3,%ymm3
        vpand   .Lmask52x4(%rip),%ymm5,%ymm5
        vpand   .Lmask52x4(%rip),%ymm6,%ymm6
        vpand   .Lmask52x4(%rip),%ymm7,%ymm7
        vpand   .Lmask52x4(%rip),%ymm8,%ymm8


        vpaddq  %ymm0,%ymm3,%ymm3
        vpaddq  %ymm1,%ymm5,%ymm5
        vpaddq  %ymm2,%ymm6,%ymm6
        vpaddq  %ymm13,%ymm7,%ymm7
        vpaddq  %ymm14,%ymm8,%ymm8



        vpcmpgtq        .Lmask52x4(%rip),%ymm3,%ymm0
        vpcmpgtq        .Lmask52x4(%rip),%ymm5,%ymm1
        vpcmpgtq        .Lmask52x4(%rip),%ymm6,%ymm2
        vpcmpgtq        .Lmask52x4(%rip),%ymm7,%ymm13
        vpcmpgtq        .Lmask52x4(%rip),%ymm8,%ymm14
        vmovmskpd       %ymm0,%r14d
        vmovmskpd       %ymm1,%r13d
        vmovmskpd       %ymm2,%r12d
        vmovmskpd       %ymm13,%r11d
        vmovmskpd       %ymm14,%r10d


        vpcmpeqq        .Lmask52x4(%rip),%ymm3,%ymm0
        vpcmpeqq        .Lmask52x4(%rip),%ymm5,%ymm1
        vpcmpeqq        .Lmask52x4(%rip),%ymm6,%ymm2
        vpcmpeqq        .Lmask52x4(%rip),%ymm7,%ymm13
        vpcmpeqq        .Lmask52x4(%rip),%ymm8,%ymm14
        vmovmskpd       %ymm0,%r9d
        vmovmskpd       %ymm1,%r8d
        vmovmskpd       %ymm2,%ebx
        vmovmskpd       %ymm13,%ecx
        vmovmskpd       %ymm14,%edx



        shlb    $4,%r13b
        orb     %r13b,%r14b
        shlb    $4,%r11b
        orb     %r11b,%r12b

        addb    %r14b,%r14b
        adcb    %r12b,%r12b
        adcb    %r10b,%r10b

        shlb    $4,%r8b
        orb     %r8b,%r9b
        shlb    $4,%cl
        orb     %cl,%bl

        addb    %r9b,%r14b
        adcb    %bl,%r12b
        adcb    %dl,%r10b

        xorb    %r9b,%r14b
        xorb    %bl,%r12b
        xorb    %dl,%r10b

        leaq    .Lkmasklut(%rip),%rdx

        movb    %r14b,%r13b
        andq    $0xf,%r14
        vpsubq  .Lmask52x4(%rip),%ymm3,%ymm0
        shlq    $5,%r14
        vmovapd (%rdx,%r14,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm3,%ymm3

        shrb    $4,%r13b
        andq    $0xf,%r13
        vpsubq  .Lmask52x4(%rip),%ymm5,%ymm0
        shlq    $5,%r13
        vmovapd (%rdx,%r13,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm5,%ymm5

        movb    %r12b,%r11b
        andq    $0xf,%r12
        vpsubq  .Lmask52x4(%rip),%ymm6,%ymm0
        shlq    $5,%r12
        vmovapd (%rdx,%r12,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm6,%ymm6

        shrb    $4,%r11b
        andq    $0xf,%r11
        vpsubq  .Lmask52x4(%rip),%ymm7,%ymm0
        shlq    $5,%r11
        vmovapd (%rdx,%r11,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm7,%ymm7

        andq    $0xf,%r10
        vpsubq  .Lmask52x4(%rip),%ymm8,%ymm0
        shlq    $5,%r10
        vmovapd (%rdx,%r10,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm8,%ymm8


        vpand   .Lmask52x4(%rip),%ymm3,%ymm3
        vpand   .Lmask52x4(%rip),%ymm5,%ymm5
        vpand   .Lmask52x4(%rip),%ymm6,%ymm6
        vpand   .Lmask52x4(%rip),%ymm7,%ymm7
        vpand   .Lmask52x4(%rip),%ymm8,%ymm8

        vmovq   %r15,%xmm0
        vpbroadcastq    %xmm0,%ymm0
        vpblendd        $3,%ymm0,%ymm4,%ymm4



        vpsrlq  $52,%ymm4,%ymm0
        vpsrlq  $52,%ymm9,%ymm1
        vpsrlq  $52,%ymm10,%ymm2
        vpsrlq  $52,%ymm11,%ymm13
        vpsrlq  $52,%ymm12,%ymm14


        vpermq  $144,%ymm14,%ymm14
        vpermq  $3,%ymm13,%ymm15
        vblendpd        $1,%ymm15,%ymm14,%ymm14

        vpermq  $144,%ymm13,%ymm13
        vpermq  $3,%ymm2,%ymm15
        vblendpd        $1,%ymm15,%ymm13,%ymm13

        vpermq  $144,%ymm2,%ymm2
        vpermq  $3,%ymm1,%ymm15
        vblendpd        $1,%ymm15,%ymm2,%ymm2

        vpermq  $144,%ymm1,%ymm1
        vpermq  $3,%ymm0,%ymm15
        vblendpd        $1,%ymm15,%ymm1,%ymm1

        vpermq  $144,%ymm0,%ymm0
        vpand   .Lhigh64x3(%rip),%ymm0,%ymm0


        vpand   .Lmask52x4(%rip),%ymm4,%ymm4
        vpand   .Lmask52x4(%rip),%ymm9,%ymm9
        vpand   .Lmask52x4(%rip),%ymm10,%ymm10
        vpand   .Lmask52x4(%rip),%ymm11,%ymm11
        vpand   .Lmask52x4(%rip),%ymm12,%ymm12


        vpaddq  %ymm0,%ymm4,%ymm4
        vpaddq  %ymm1,%ymm9,%ymm9
        vpaddq  %ymm2,%ymm10,%ymm10
        vpaddq  %ymm13,%ymm11,%ymm11
        vpaddq  %ymm14,%ymm12,%ymm12



        vpcmpgtq        .Lmask52x4(%rip),%ymm4,%ymm0
        vpcmpgtq        .Lmask52x4(%rip),%ymm9,%ymm1
        vpcmpgtq        .Lmask52x4(%rip),%ymm10,%ymm2
        vpcmpgtq        .Lmask52x4(%rip),%ymm11,%ymm13
        vpcmpgtq        .Lmask52x4(%rip),%ymm12,%ymm14
        vmovmskpd       %ymm0,%r14d
        vmovmskpd       %ymm1,%r13d
        vmovmskpd       %ymm2,%r12d
        vmovmskpd       %ymm13,%r11d
        vmovmskpd       %ymm14,%r10d


        vpcmpeqq        .Lmask52x4(%rip),%ymm4,%ymm0
        vpcmpeqq        .Lmask52x4(%rip),%ymm9,%ymm1
        vpcmpeqq        .Lmask52x4(%rip),%ymm10,%ymm2
        vpcmpeqq        .Lmask52x4(%rip),%ymm11,%ymm13
        vpcmpeqq        .Lmask52x4(%rip),%ymm12,%ymm14
        vmovmskpd       %ymm0,%r9d
        vmovmskpd       %ymm1,%r8d
        vmovmskpd       %ymm2,%ebx
        vmovmskpd       %ymm13,%ecx
        vmovmskpd       %ymm14,%edx



        shlb    $4,%r13b
        orb     %r13b,%r14b
        shlb    $4,%r11b
        orb     %r11b,%r12b

        addb    %r14b,%r14b
        adcb    %r12b,%r12b
        adcb    %r10b,%r10b

        shlb    $4,%r8b
        orb     %r8b,%r9b
        shlb    $4,%cl
        orb     %cl,%bl

        addb    %r9b,%r14b
        adcb    %bl,%r12b
        adcb    %dl,%r10b

        xorb    %r9b,%r14b
        xorb    %bl,%r12b
        xorb    %dl,%r10b

        leaq    .Lkmasklut(%rip),%rdx

        movb    %r14b,%r13b
        andq    $0xf,%r14
        vpsubq  .Lmask52x4(%rip),%ymm4,%ymm0
        shlq    $5,%r14
        vmovapd (%rdx,%r14,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm4,%ymm4

        shrb    $4,%r13b
        andq    $0xf,%r13
        vpsubq  .Lmask52x4(%rip),%ymm9,%ymm0
        shlq    $5,%r13
        vmovapd (%rdx,%r13,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm9,%ymm9

        movb    %r12b,%r11b
        andq    $0xf,%r12
        vpsubq  .Lmask52x4(%rip),%ymm10,%ymm0
        shlq    $5,%r12
        vmovapd (%rdx,%r12,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm10,%ymm10

        shrb    $4,%r11b
        andq    $0xf,%r11
        vpsubq  .Lmask52x4(%rip),%ymm11,%ymm0
        shlq    $5,%r11
        vmovapd (%rdx,%r11,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm11,%ymm11

        andq    $0xf,%r10
        vpsubq  .Lmask52x4(%rip),%ymm12,%ymm0
        shlq    $5,%r10
        vmovapd (%rdx,%r10,1),%ymm2
        vblendvpd       %ymm2,%ymm0,%ymm12,%ymm12


        vpand   .Lmask52x4(%rip),%ymm4,%ymm4
        vpand   .Lmask52x4(%rip),%ymm9,%ymm9
        vpand   .Lmask52x4(%rip),%ymm10,%ymm10
        vpand   .Lmask52x4(%rip),%ymm11,%ymm11
        vpand   .Lmask52x4(%rip),%ymm12,%ymm12

        vmovdqu %ymm3,0(%rdi)
        vmovdqu %ymm5,32(%rdi)
        vmovdqu %ymm6,64(%rdi)
        vmovdqu %ymm7,96(%rdi)
        vmovdqu %ymm8,128(%rdi)

        vmovdqu %ymm4,160(%rdi)
        vmovdqu %ymm9,192(%rdi)
        vmovdqu %ymm10,224(%rdi)
        vmovdqu %ymm11,256(%rdi)
        vmovdqu %ymm12,288(%rdi)

        vzeroupper
        movq    0(%rsp),%r15
.cfi_restore    %r15
        movq    8(%rsp),%r14
.cfi_restore    %r14
        movq    16(%rsp),%r13
.cfi_restore    %r13
        movq    24(%rsp),%r12
.cfi_restore    %r12
        movq    32(%rsp),%rbp
.cfi_restore    %rbp
        movq    40(%rsp),%rbx
.cfi_restore    %rbx
        leaq    48(%rsp),%rsp
.cfi_adjust_cfa_offset  -48
.Lossl_rsaz_amm52x20_x2_avxifma256_epilogue:
        .byte   0xf3,0xc3
.cfi_endproc
.size   ossl_rsaz_amm52x20_x2_avxifma256, .-ossl_rsaz_amm52x20_x2_avxifma256
.text

.align  32
.globl  ossl_extract_multiplier_2x20_win5_avx
.type   ossl_extract_multiplier_2x20_win5_avx,@function
ossl_extract_multiplier_2x20_win5_avx:
.cfi_startproc
.byte   243,15,30,250
        vmovapd .Lones(%rip),%ymm14
        vmovq   %rdx,%xmm10
        vpbroadcastq    %xmm10,%ymm12
        vmovq   %rcx,%xmm10
        vpbroadcastq    %xmm10,%ymm13
        leaq    10240(%rsi),%rax


        vpxor   %xmm0,%xmm0,%xmm0
        vmovapd %ymm0,%ymm11
        vmovapd %ymm0,%ymm1
        vmovapd %ymm0,%ymm2
        vmovapd %ymm0,%ymm3
        vmovapd %ymm0,%ymm4
        vmovapd %ymm0,%ymm5
        vmovapd %ymm0,%ymm6
        vmovapd %ymm0,%ymm7
        vmovapd %ymm0,%ymm8
        vmovapd %ymm0,%ymm9

.align  32
.Lloop:
        vpcmpeqq        %ymm11,%ymm12,%ymm15
        vmovdqu 0(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm0,%ymm0
        vmovdqu 32(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm1,%ymm1
        vmovdqu 64(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm2,%ymm2
        vmovdqu 96(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm3,%ymm3
        vmovdqu 128(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm4,%ymm4
        vpcmpeqq        %ymm11,%ymm13,%ymm15
        vmovdqu 160(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm5,%ymm5
        vmovdqu 192(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm6,%ymm6
        vmovdqu 224(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm7,%ymm7
        vmovdqu 256(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm8,%ymm8
        vmovdqu 288(%rsi),%ymm10
        vblendvpd       %ymm15,%ymm10,%ymm9,%ymm9
        vpaddq  %ymm14,%ymm11,%ymm11
        addq    $320,%rsi
        cmpq    %rsi,%rax
        jne     .Lloop
        vmovdqu %ymm0,0(%rdi)
        vmovdqu %ymm1,32(%rdi)
        vmovdqu %ymm2,64(%rdi)
        vmovdqu %ymm3,96(%rdi)
        vmovdqu %ymm4,128(%rdi)
        vmovdqu %ymm5,160(%rdi)
        vmovdqu %ymm6,192(%rdi)
        vmovdqu %ymm7,224(%rdi)
        vmovdqu %ymm8,256(%rdi)
        vmovdqu %ymm9,288(%rdi)
        .byte   0xf3,0xc3
.cfi_endproc
.size   ossl_extract_multiplier_2x20_win5_avx, .-ossl_extract_multiplier_2x20_win5_avx
.section        .rodata
.align  32
.Lones:
.quad   1,1,1,1
.Lzeros:
.quad   0,0,0,0
        .section ".note.gnu.property", "a"
        .p2align 3
        .long 1f - 0f
        .long 4f - 1f
        .long 5
0:
        # "GNU" encoded with .byte, since .asciz isn't supported
        # on Solaris.
        .byte 0x47
        .byte 0x4e
        .byte 0x55
        .byte 0
1:
        .p2align 3
        .long 0xc0000002
        .long 3f - 2f
2:
        .long 3
3:
        .p2align 3
4:
